#!/usr/bin/env python
# -*- coding:utf-8 -*-
# author:sirian
# datetime:2018/11/8 11:18
# software: PyCharm

import urllib2
import json
import jsonpath

url = "https://www.lagou.com/lbs/getAllCitySearchLabels.json"

ua = {
    "Connection": "keep-alive",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept-Language": "zh-CN,zh;q=0.9",
}

request = urllib2.Request(url, headers=ua)
response = urllib2.urlopen(request)

html = response.read()
#print html

#把json形式的字符串，转成python形式的Unicode字符串
unicodestr = json.loads(html)

#将城市提取出来形成列表
city_list = jsonpath.jsonpath(unicodestr,"$..name")

# dumps()默认中文为ascii编码格式，ensure_ascii默认为Ture
# 禁用ascii编码格式，返回的Unicode字符串，方便使用
array = json.dumps(city_list, ensure_ascii=False)

#写入到文件
with open("lagou_city.txt", "w") as f:
    f.write(array.encode("utf-8"))